# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from yzw.rules import *
from datetime import datetime
import json
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    # 常用链接
    index_url = 'https://mro.yzw.cn/'
    # 商品列表页 post
    sku_list_url = 'https://mro.yzw.cn/api/fastq/biz/v1/open/item/search'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 20
        self.max_page_limit = 100
        self.settings = get_project_settings()
        self.SKU1_WHITE_LIST = self.settings.get('SKU1_WHITE_LIST')

    def _build_list_req(self, url, catalog1Id, catalog1Name, catalog2Id, catalog2Name, catalog3Id, catalog3Name, page):
        return Request(
            method='POST',
            url=url,
            meta={
                'reqType': 'sku',
                'batchNo': self.batch_no,
                'page': page,
                'catalog1Id': catalog1Id,
                'catalog1Name': catalog1Name,
                'catalog2Id': catalog2Id,
                'catalog2Name': catalog2Name,
                'catalog3Id': catalog3Id,
                'catalog3Name': catalog3Name,
            },
            headers={
                "content-type": "application/json;charset=UTF-8",
                "cookie": "area_code=129794; level_code=111512131163108173960001; UM_distinctid=1798262ab439cd-053c4ad1fe898b-7e697a60-1fa400-1798262ab44c73; Hm_lvt_fed06b1c6a8b498586c618a1047d67bf=1621390175; CNZZDATA1278877412=1223947834-1621385200-%7C1621390602; acw_tc=707c9fd816213938073802318e22f1dede0eff22f4630acc2c7c51b88e8e64; Hm_lpvt_fed06b1c6a8b498586c618a1047d67bf=1621393827",
                "origin": "https://mro.yzw.cn",
                "pragma": "no-cache",
                "referer": "https://mro.yzw.cn/search/product?currentPage=2&frontcateIds=104614",
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62",
                "x-request-digest": "048387205d9d4b6b892b14381d79891fa",
                "x-request-timestamp": "1621393867072",
                "x-site-client-type": "1",
            },
            body=json.dumps(
                {"condition": {"categories": [{"categoryIds": [catalog3Id], "field": "category"}],
                               "areaCode": "1115121311631081"}, "currentPage": page, "pageSize": self.page_size}
            ),
            callback=self.parse_sku_content_deal,
            errback=self.error_back,
            dont_filter=True,
        )

    def start_requests(self):
        # 品类、品牌
        yield Request(
            url=self.index_url,
            meta={
                'reqType': 'sku',
                'batchNo': self.batch_no,

            },
            callback=self.parse_total_page,
            errback=self.error_back,
            dont_filter=True,
        )

    # 处理sku列表
    def parse_total_page(self, response):
        # 处理品类列表
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

            for cat in cats:
                if cat.get('level') == 3:
                    # 采集sku列表
                    catalog3Id = cat.get('catalogId')
                    catalog3Name = cat.get('catalogName')
                    catalog2Id = cat.get('parentId')
                    catalog2Name = [i.get('catalogName') for i in cats if i.get('catalogId') == cat.get('parentId')][0]
                    catalog1Id = [i.get('parentId') for i in cats if i.get('catalogId') == catalog2Id][0]
                    catalog1Name = [i.get('catalogName') for i in cats if i.get('catalogId') == catalog1Id][0]
                    if self.SKU1_WHITE_LIST:
                        if catalog1Name in self.SKU1_WHITE_LIST:
                            yield self._build_list_req(self.sku_list_url, catalog1Id, catalog1Name, catalog2Id,
                                                       catalog2Name,
                                                       catalog3Id, catalog3Name, 1)
                    else:
                        yield self._build_list_req(self.sku_list_url, catalog1Id, catalog1Name, catalog2Id,
                                                   catalog2Name,
                                                   catalog3Id, catalog3Name, 1)

    # 处理sku列表
    def parse_sku_content_deal(self, response):
        meta = response.meta
        catalog1Name = meta.get('catalog1Name')
        catalog1Id = meta.get('catalog1Id')
        catalog2Name = meta.get('catalog2Name')
        catalog2Id = meta.get('catalog2Id')
        catalog3Name = meta.get('catalog3Name')
        catalog3Id = meta.get('catalog3Id')
        cur_page = meta.get('page')

        # 处理sku列表
        sku_list = parse_sku(response)
        self.logger.info("清单: cat=%s, page=%s" % (catalog3Id, cur_page))
        if sku_list:
            yield Box('sku', self.batch_no, sku_list)
            # 发起分页
            if cur_page == 1:
                # 解析页面
                total_pages = parse_total_page(response)
                self.logger.info("清单1: cat=%s, total_page=%s" % (catalog3Id, total_pages))
                for page in range(2, total_pages + 1):
                    yield self._build_list_req(self.sku_list_url, catalog1Id, catalog1Name, catalog2Id, catalog2Name,
                                               catalog3Id, catalog3Name, page)
        else:
            self.logger.info('空页: cat=%s, page=%s' % (catalog3Id, cur_page))
